/* ========================================================= */
/*                      IMPORT PACKAGE DATA                  */
/* ========================================================= */
clear all

************************************************************************************
*
*	DESCRIPTION: 
*   This file performs matching procedures on sub-samples.
*   FinTech vs bank borrowers
*
************************************************************************************

/* ========================================================= */
/*                DEFINE MATCHING VARIABLES                  */
/* ========================================================= */
global xlist "outsideloan outloanyear i.codummy ageatevent i.rating r3filled i.m_r3filled r13filled i.m_r13filled r_collateral_filled i.m_r_collateral_filled"
global xlist_nomissing "outsideloan outloanyear i.codummy ageatevent r3filled r13filled r_collateral_filled"
global xlistworating "outsideloan outloanyear i.codummy ageatevent r3filled i.m_r3filled r13filled i.m_r13filled r_collateral_filled i.m_r_collateral_filled"

global ylist "loan"  // Time-variant matching variables
global celllist "industry size"  // Industry-size matching cells

/* ========================================================= */
/*               FORINVESTMENT == 1 MATCHING                 */
/* ========================================================= */

/* Reshape dataset to wide format for PSM */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear

keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

distinct siren if loantype == "" & p2p == 1
distinct siren if p2p == 1

/* Keep relevant observations */
keep if (forinvestment == 1 & p2p == 0) | (loantype == "MA" | loantype == "IM")

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist r3filled r13filled r5filled r_collateral_filled {
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* Generate codummy variable */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled rating industry ageatevent r3filled m_r3filled r13filled m_r13filled r_collateral_filled m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

/* Merge with stable dataset */
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert rating variable */
replace rating = group(rating)
egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*               PERFORM K-NEAREST NEIGHBOR MATCHING         */
/* ========================================================= */
capture drop _*

foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                  SAVE MATCHED SAMPLE                      */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d

merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\matched_sample_nn_forinvestment_1.dta", replace
	
	
/* ========================================================= */
/*                FORINVESTMENT == 0 MATCHING                */
/* ========================================================= */

/* Reshape dataset to wide format for PSM */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear

keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Keep observations where forinvestment == 0 */
keep if forinvestment == 0 | ((loantype != "MA") & (loantype != "IM") & loantype != "")

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist outsideloan r3filled r13filled r5filled r_collateral_filled {
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* Generate codummy variable */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled rating industry ageatevent r3filled m_r3filled r13filled m_r13filled r_collateral_filled m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

/* Merge with stable dataset */
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert rating variable */
replace rating = group(rating)
egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*               PERFORM K-NEAREST NEIGHBOR MATCHING         */
/* ========================================================= */
capture drop _*

foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                  SAVE MATCHED SAMPLE                      */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d

merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\matched_sample_nn_forinvestment_0.dta", replace
	
	
/* ========================================================= */
/*                  RATED MATCHING                      */
/* ========================================================= */

/* Reshape dataset to wide format for PSM */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear    

keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Convert rating variable to string */
tostring rating, gen(ratingstr) force

/* Identify rating at t = -11 */
gen temp = ratingstr if deltamonths == -11
bys siren (temp): gen rating_pre = temp[_N]
drop temp

/* Keep only observations where prior rating is NOT in {1, 10, 11, 12, 13} */
keep if !inlist(rating_pre, "1", "10", "11", "12", "13")

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist outsideloan r3filled r13filled r5filled r_collateral_filled {
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* Generate codummy variable */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled rating industry ageatevent ///
         r3filled m_r3filled r13filled m_r13filled r_collateral_filled ///
         m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

/* Merge with stable dataset */
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert rating variable */
replace rating = group(rating)
egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*               PERFORM K-NEAREST NEIGHBOR MATCHING         */
/* ========================================================= */
capture drop _*

foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                  SAVE MATCHED SAMPLE                      */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d

merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

distinct siren if p2p == 1
distinct siren if p2p == 0

save "$output\matched_sample_nn_rating_234.dta", replace

/* ========================================================= */
/*                  UNRATED MATCHING                     */
/* ========================================================= */

/* Reshape dataset to wide format for PSM */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear    

keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Convert rating variable to string */
tostring rating, gen(ratingstr) force

/* Identify rating at t = -11 */
gen temp = ratingstr if deltamonths == -11
bys siren (temp): gen rating_pre = temp[_N]
drop temp

/* Keep only observations where prior rating == 1 */
keep if rating_pre == "1" | rating_pre == ""

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist outsideloan r3filled r13filled r5filled r_collateral_filled {
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* Generate codummy variable */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled rating industry ageatevent ///
         r3filled m_r3filled r13filled m_r13filled r_collateral_filled ///
         m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

/* Merge with stable dataset */
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert rating variable */
replace rating = group(rating)
egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*               PERFORM K-NEAREST NEIGHBOR MATCHING         */
/* ========================================================= */
capture drop _*

foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                  SAVE MATCHED SAMPLE                      */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d

merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

distinct siren if p2p == 1
distinct siren if p2p == 0

save "$output\matched_sample_nn_rating_1.dta", replace

/* ========================================================= */
/*                     TABLE 6 MATCHING                      */
/* ========================================================= */

/* Load dataset */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear    

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Merge with P2P listing dataset */
merge m:1 siren using "$input\liste_table6_p2p.dta", keep(3) nogen 

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist r3filled r13filled r5filled r_collateral_filled {
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* Generate codummy variable */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled industry ageatevent ///
         r3filled m_r3filled r13filled m_r13filled r_collateral_filled ///
         m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

/* Merge with stable dataset */
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert industry variable */
egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*               PERFORM K-NEAREST NEIGHBOR MATCHING         */
/* ========================================================= */
capture drop _*

foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

* We don't match on rating -> too restrictive given the sample size
egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlistworating ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                   CHECK MATCHING RESULTS                  */
/* ========================================================= */
distinct siren if p2p == 0 & weight != .
distinct siren if p2p == 1 & weight != .

/* ========================================================= */
/*                  SAVE MATCHED SAMPLE                      */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d

merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\matched_sample_p2p_table6.dta", replace

/* ============================================================================= */
/*                       TABLE 6 - Investment (PSM Matching)                     */
/* ============================================================================= */

* ------------------------------------------------------------------------------
* Step 1: Load and filter dataset
* ------------------------------------------------------------------------------

use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear

* Filter observations based on date range
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

* Merge with the P2P firm list
merge m:1 siren using "$input\liste_table6_p2p.dta", keep(3) nogen 

* Keep firms that either received a P2P loan or are investment-related loans
keep if (p2p == 0) | (loantype == "MA" | loantype == "IM")

* Generate total loan amount variable
gen loan = stloan + ltloan + bm + bi + oc

* Transform loan variable using log transformation
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

* ------------------------------------------------------------------------------
* Step 2: Handle missing values in key financial variables
* ------------------------------------------------------------------------------

foreach var of varlist r3filled r13filled r5filled r_collateral_filled {
    * Create a dummy for missing values
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

* ------------------------------------------------------------------------------
* Step 3: Save auxiliary data for PSM
* ------------------------------------------------------------------------------

* Create a dummy indicating if a firm had a credit line before treatment
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

* Save firm characteristics before treatment (delta=-11)
preserve 
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled industry ageatevent ///
	r3filled m_r3filled r13filled m_r13filled r_collateral_filled ///
	m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace    
restore

* ------------------------------------------------------------------------------
* Step 4: Reshape data for PSM
* ------------------------------------------------------------------------------

drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta

duplicates drop siren delta2, force
keep siren p2p delta2 $celllist $ylist 
sort siren delta2

bys siren delta2: gen nb = _N
su nb, d
drop nb

qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

sort siren
bys siren: gen nb = _N
su nb, d
drop nb

duplicates drop siren, force

* Merge firm-level characteristics from saved dataset
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

* Convert industry categorical variable into numeric group
egen indg = group(industry)
drop industry
rename indg industry

* ------------------------------------------------------------------------------
* Step 5: Propensity Score Matching (PSM) using k-nearest neighbor
* ------------------------------------------------------------------------------

capture drop _*

* Create placeholders for matching results
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn  {
    gen `name' = .
}

* Create group identifier for matching
egen cell = group($celllist)
qui levels cell, local(gr)

foreach i of local gr {
    
    * Perform PSM matching within each cell
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlistworating ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing matching for cell `i'..."
        
        * Store matching results
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
    } else {
        display "Skipping cell `i' due to insufficient observations..."
    }
}

* ------------------------------------------------------------------------------
* Step 6: Save matched sample for further analysis
* ------------------------------------------------------------------------------

* Retain only relevant variables for matched sample
keep siren pscore support weight id n1-n5 nn
keep if weight != .

* Merge matched firms back with the original dataset
sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

* Save matched dataset
save "$output\matched_sample_p2p_table6_FI.dta", replace

/* ============================================================================= */
/*                       TABLE 4 - No Investment (PSM Matching)                  */
/* ============================================================================= */

* ------------------------------------------------------------------------------
* Step 1: Load and filter dataset
* ------------------------------------------------------------------------------

use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear     	

* Keep only observations within the specified date range
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

* Merge with the P2P firm list to keep relevant firms
merge m:1 siren using "$input\liste_table6_p2p.dta", keep(3) nogen 

* Keep firms that either did not receive a P2P loan or have non-investment-related loans
keep if p2p == 0 | ((loantype != "MA") & (loantype != "IM") & loantype != "")

* Generate total loan amount variable
gen loan = stloan + ltloan + bm + bi + oc

* Transform loan variable using log transformation
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

* ------------------------------------------------------------------------------
* Step 2: Handle missing values in key financial variables
* ------------------------------------------------------------------------------

foreach var of varlist r3filled r13filled r5filled r_collateral_filled {
    * Create a dummy for missing values
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

* ------------------------------------------------------------------------------
* Step 3: Save auxiliary data for PSM
* ------------------------------------------------------------------------------

* Create a dummy indicating if a firm had a credit line before treatment
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

* Save firm characteristics before treatment (delta=-11)
preserve 
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled industry ageatevent r3filled ///
	m_r3filled r13filled m_r13filled r_collateral_filled ///
	m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace    
restore

* ------------------------------------------------------------------------------
* Step 4: Reshape data for PSM
* ------------------------------------------------------------------------------

drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta

duplicates drop siren delta2, force
keep siren p2p delta2 $celllist $ylist 
sort siren delta2

bys siren delta2: gen nb = _N
su nb, d
drop nb

qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

sort siren
bys siren: gen nb = _N
su nb, d
drop nb

duplicates drop siren, force

* Merge firm-level characteristics from saved dataset
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

* Convert industry categorical variable into numeric group
egen indg = group(industry)
drop industry
rename indg industry

* ------------------------------------------------------------------------------
* Step 5: Propensity Score Matching (PSM) using k-nearest neighbor
* ------------------------------------------------------------------------------

capture drop _*

* Create placeholders for matching results
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn  {
    gen `name' = .
}

* Create group identifier for matching
egen cell = group($celllist)
qui levels cell, local(gr)

foreach i of local gr {
    
    * Perform PSM matching within each cell
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlistworating ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing matching for cell `i'..."
        
        * Store matching results
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
    } else {
        display "Skipping cell `i' due to insufficient observations..."
    }
}

* ------------------------------------------------------------------------------
* Step 6: Save matched sample for further analysis
* ------------------------------------------------------------------------------

* Retain only relevant variables for matched sample
keep siren pscore support weight id n1-n5 nn
keep if weight != .

* Merge matched firms back with the original dataset
sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

* Save matched dataset
save "$output\matched_sample_p2p_table6_noFI.dta", replace

/* ========================================================= */
/*          TABLE DEFAULT - LOW RATE MATCHING SCRIPT         */
/* ========================================================= */

/* Load dataset */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear    

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Merge with unrated siren dataset and keep only low-rate firms */
merge m:1 siren using "$output\unrated_siren.dta", keep(3) nogen 
keep if high_rate == 0

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist r3filled r13filled r5filled r_collateral_filled {
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* Generate codummy variable */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled industry ageatevent ///
         r3filled m_r3filled r13filled m_r13filled r_collateral_filled ///
         m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

/* Merge with stable dataset */
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert industry variable */
egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*               PERFORM K-NEAREST NEIGHBOR MATCHING         */
/* ========================================================= */
capture drop _*

foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

* We don't match on rating -> too restrictive given the sample size
egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlistworating ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                  SAVE MATCHED SAMPLE                      */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d

merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\table_default_low_rate.dta", replace

	
/* ========================================================= */
/*          TABLE DEFAULT - HIGH RATE MATCHING SCRIPT        */
/* ========================================================= */

/* Load dataset */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear    

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Merge with unrated siren dataset and keep only high-rate firms */
merge m:1 siren using "$output\unrated_siren.dta", keep(3) nogen 
keep if high_rate == 1

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* Create dummy variables for missing values */
foreach var of varlist r3filled r13filled r5filled r_collateral_filled {
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* Generate codummy variable */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled industry ageatevent ///
         r3filled m_r3filled r13filled m_r13filled r_collateral_filled ///
         m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

/* Merge with stable dataset */
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert industry variable */
egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*               PERFORM K-NEAREST NEIGHBOR MATCHING         */
/* ========================================================= */
capture drop _*

foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

* We don't match on rating -> too restrictive given the sample size
egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlistworating ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                  SAVE MATCHED SAMPLE                      */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d

merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\table_default_high_rate.dta", replace
	

	/* ========================================================= */
/*          NO MISSING - PROPENSITY SCORE MATCHING          */
/* ========================================================= */

/* Load dataset */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear    

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* ========================================================= */
/*               GENERATE CODUMMY VARIABLE                   */
/* ========================================================= */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled industry ageatevent ///
         r3filled r13filled r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

/* Merge with stable dataset */
merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert industry variable */
egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*               PERFORM K-NEAREST NEIGHBOR MATCHING         */
/* ========================================================= */
capture drop _*

foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist_nomissing ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}


/* ========================================================= */
/*                  SAVE MATCHED SAMPLE                      */
/* ========================================================= */
keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d

merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

save "$output\matched_sample_no_missing.dta", replace

/* ========================================================= */
/*                    UNSECURED LOANS                   */
/* ========================================================= */

/* ========================================================= */
/*                    LOAD & PREPARE DATA                    */
/* ========================================================= */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear

/* Keep only observations within the specified date range */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Keep only firms with unsecured loans (no collateral for bank loans) */
keep if contrancollateral == 0 | p2p == 1

/* Aggregate loan variable */
gen loan = stloan + ltloan + bm + bi + oc

/* Log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/* ========================================================= */
/*         CREATE DUMMY VARIABLES FOR MISSING VALUES         */
/* ========================================================= */
foreach var of varlist r3filled r13filled r5filled r_collateral_filled { 
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/* ========================================================= */
/*               GENERATE CODUMMY VARIABLE                   */
/* ========================================================= */
preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    keep siren codummy
    save "$output\psm_co_dummy.dta", replace
restore

/* Prepare dataset for PSM */
preserve
    keep if delta == -11
    keep siren outloanyear size r5filled m_r5filled rating industry ///
         ageatevent r3filled m_r3filled r13filled m_r13filled ///
         r_collateral_filled m_r_collateral_filled outsideloan
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace
restore

/* ========================================================= */
/*               PREPARE FOR NEAREST NEIGHBOR MATCHING       */
/* ========================================================= */
drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

keep siren p2p delta2 $celllist $ylist

sort siren delta2
bys siren delta2: gen nb = _N
su nb, d
drop nb

/* Reshape dataset */
qui reshape wide $ylist, i(siren p2p $celllist) j(delta2)

sort siren
bys siren: gen nb = _N
su nb, d
drop nb

duplicates drop siren, force

merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/* Convert rating to grouped variable */
replace rating = group(rating)

egen indg = group(industry)
drop industry
rename indg industry

/* ========================================================= */
/*            K-NEAREST NEIGHBOR PROPENSITY SCORE MATCHING   */
/* ========================================================= */

capture drop _*
foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
    gen `name' = .
}

egen cell = group($celllist)
qui levels cell, local(gr)
foreach i of local gr {
    /* Perform nearest neighbor matching */
    capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

    if c(rc) == 0 {
        display "Processing cell `i'"
        foreach var of varlist pscore support weight id n1-n5 nn {
            replace `var' = _`var' if cell == `i'
        }
        
        /* Visualize the matching outcome */
        capture psgraph, name(cell`i',replace) bin(30)
        capture pstest loan0 loan1 loan2 $xlist
    }
    else {
        display "Skipping cell `i' due to insufficient observations"
    }
}

/* ========================================================= */
/*                SAVE MATCHED SAMPLE                        */
/* ========================================================= */

keep siren pscore support weight id n1-n5 nn
keep if weight ~= .
sum pscore support weight id n1 nn, d
merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
keep if _merge == 3
drop _merge

/* Save matched sample */
save "$output\matched_sample_nn_unsecured_outside_loan_1.dta", replace
